
<!DOCTYPE html>

<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>tigramite.independence_tests.gsquared &#8212; Tigramite 5.2 documentation</title>
    <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
    <link rel="stylesheet" type="text/css" href="../../../_static/alabaster.css" />
    <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
    <script src="../../../_static/jquery.js"></script>
    <script src="../../../_static/underscore.js"></script>
    <script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
    <script src="../../../_static/doctools.js"></script>
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" />
   
  <link rel="stylesheet" href="../../../_static/custom.css" type="text/css" />
  
  
  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />

  </head><body>
  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          

          <div class="body" role="main">
            
  <h1>Source code for tigramite.independence_tests.gsquared</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Tigramite causal discovery for time series.&quot;&quot;&quot;</span>

<span class="c1"># Author: Sagar Nagaraj Simha, Jakob Runge &lt;jakob@jakob-runge.com&gt;</span>
<span class="c1">#</span>
<span class="c1"># License: GNU General Public License v3.0</span>

<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">special</span><span class="p">,</span> <span class="n">spatial</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

<span class="kn">from</span> <span class="nn">scipy.stats</span> <span class="kn">import</span> <span class="n">chi2</span>
<span class="kn">from</span> <span class="nn">scipy.special</span> <span class="kn">import</span> <span class="n">xlogy</span>
<span class="kn">from</span> <span class="nn">scipy.stats.contingency</span> <span class="kn">import</span> <span class="n">crosstab</span>
<span class="kn">from</span> <span class="nn">scipy.stats.contingency</span> <span class="kn">import</span> <span class="n">expected_freq</span>
<span class="kn">from</span> <span class="nn">scipy.stats.contingency</span> <span class="kn">import</span> <span class="n">margins</span>
<span class="kn">from</span> <span class="nn">.independence_tests_base</span> <span class="kn">import</span> <span class="n">CondIndTest</span>

<div class="viewcode-block" id="Gsquared"><a class="viewcode-back" href="../../../index.html#tigramite.independence_tests.gsquared.Gsquared">[docs]</a><span class="k">class</span> <span class="nc">Gsquared</span><span class="p">(</span><span class="n">CondIndTest</span><span class="p">):</span>
<span class="w">    </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;G-squared conditional independence test for categorical data.</span>

<span class="sd">    Uses Chi2 as the null distribution and the method from [7]_ to</span>
<span class="sd">    adjust the degrees of freedom. Valid only asymptotically, recommended are</span>
<span class="sd">    above 1000-2000 samples (depends on data). For smaller sample sizes use the</span>
<span class="sd">    CMIsymb class which includes a local permutation test.</span>

<span class="sd">    Assumes one-dimensional X, Y.</span>

<span class="sd">    This method requires the scipy.stats package.</span>

<span class="sd">    Notes</span>
<span class="sd">    -----</span>
<span class="sd">    The general formula is</span>

<span class="sd">    .. math:: G(X;Y|Z) &amp;= 2 n \sum p(z)  \sum \sum  p(x,y|z) \log</span>
<span class="sd">                \frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)}</span>

<span class="sd">    where :math:`n` is the sample size. This is simply :math:`2 n CMI(X;Y|Z)`.</span>

<span class="sd">    References</span>
<span class="sd">    ----------</span>

<span class="sd">    .. [7] Bishop, Y.M.M., Fienberg, S.E. and Holland, P.W. (1975) Discrete</span>
<span class="sd">           Multivariate Analysis: Theory and Practice. MIT Press, Cambridge.</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    n_symbs : int, optional (default: None)</span>
<span class="sd">        Number of symbols in input data. Should be at least as large as the</span>
<span class="sd">        maximum array entry + 1. If None, n_symbs is inferred by scipy&#39;s crosstab</span>

<span class="sd">    **kwargs :</span>
<span class="sd">        Arguments passed on to parent class CondIndTest.</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="nd">@property</span>
    <span class="k">def</span> <span class="nf">measure</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Concrete property to return the measure of the independence test</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_measure</span>
    
    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
                 <span class="n">n_symbs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
                 <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
        
        <span class="c1"># Setup the member variables</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_measure</span> <span class="o">=</span> <span class="s1">&#39;gsquared&#39;</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">n_symbs</span> <span class="o">=</span> <span class="n">n_symbs</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">two_sided</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">residual_based</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">recycle_residuals</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="n">CondIndTest</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>

        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbosity</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;n_symbs = </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_symbs</span><span class="p">)</span>
            <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span>

<div class="viewcode-block" id="Gsquared.get_dependence_measure"><a class="viewcode-back" href="../../../index.html#tigramite.independence_tests.gsquared.Gsquared.get_dependence_measure">[docs]</a>    <span class="k">def</span> <span class="nf">get_dependence_measure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">array</span><span class="p">,</span> <span class="n">xyz</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;Returns Gsquared/G-test test statistic.</span>

<span class="sd">        Parameters</span>
<span class="sd">        ----------</span>
<span class="sd">        array : array-like</span>
<span class="sd">            data array with X, Y, Z in rows and observations in columns.</span>

<span class="sd">        xyz : array of ints</span>
<span class="sd">            XYZ identifier array of shape (dim,).</span>

<span class="sd">        Returns</span>
<span class="sd">        -------</span>
<span class="sd">        val : float</span>
<span class="sd">            G-squared estimate.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="n">_</span><span class="p">,</span> <span class="n">T</span> <span class="o">=</span> <span class="n">array</span><span class="o">.</span><span class="n">shape</span>
        <span class="n">z_indices</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">xyz</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>

        <span class="c1"># Flip 2D-array so that order is ([zn...z0, ym...y0, xk...x0], T). The</span>
        <span class="c1"># contingency table is built in this order to ease creating subspaces</span>
        <span class="c1"># of Z=z.</span>
        <span class="n">array_flip</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">flipud</span><span class="p">(</span><span class="n">array</span><span class="p">)</span>

        <span class="c1"># When n_symbs is given, levels=range(0, n_symbs). If data does not</span>
        <span class="c1"># have a symbol in levels, then count=0 in the corresponding N-D</span>
        <span class="c1"># position of contingency table. If levels does not contain a certain</span>
        <span class="c1"># symbol that is present in the data, then the symbol from data is</span>
        <span class="c1"># ignored. If None, then levels are inferred from data (default).</span>

        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_symbs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
            <span class="n">levels</span> <span class="o">=</span> <span class="kc">None</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">levels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">tile</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_symbs</span><span class="p">),</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">xyz</span><span class="p">),</span> <span class="mi">1</span><span class="p">))</span>  
            <span class="c1"># Assuming same list of levels for (z, y, x).</span>

        <span class="n">_</span><span class="p">,</span> <span class="n">observed</span> <span class="o">=</span> <span class="n">crosstab</span><span class="p">(</span><span class="o">*</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">array_flip</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">xyz</span><span class="p">),</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">((</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">T</span><span class="p">))),</span> <span class="n">levels</span><span class="o">=</span><span class="n">levels</span><span class="p">,</span>
                           <span class="n">sparse</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>

        <span class="n">observed_shape</span> <span class="o">=</span> <span class="n">observed</span><span class="o">.</span><span class="n">shape</span>

        <span class="n">gsquare</span> <span class="o">=</span> <span class="mf">0.0</span>
        <span class="n">dof</span> <span class="o">=</span> <span class="mi">0</span>

        <span class="c1"># The following loop is over the z-subspace to sum over the G-squared</span>
        <span class="c1"># statistic and count empty entries to adjust the degrees of freedom.</span>

        <span class="c1"># TODO: Can be further optimized to operate entirely on observed array</span>
        <span class="c1"># without &#39;for&#39;, to operate only within slice of z. sparse=True can</span>
        <span class="c1"># also optimize further.</span>

        <span class="c1"># For each permutation of z = (zn ... z1, z0). Example - (0...1,0,1)</span>
        <span class="k">for</span> <span class="n">zs</span> <span class="ow">in</span> <span class="n">np</span><span class="o">.</span><span class="n">ndindex</span><span class="p">(</span><span class="n">observed_shape</span><span class="p">[:</span><span class="nb">len</span><span class="p">(</span><span class="n">z_indices</span><span class="p">)]):</span>
            <span class="n">observedYX</span> <span class="o">=</span> <span class="n">observed</span><span class="p">[</span><span class="n">zs</span><span class="p">]</span>
            <span class="n">mY</span><span class="p">,</span> <span class="n">mX</span> <span class="o">=</span> <span class="n">margins</span><span class="p">(</span><span class="n">observedYX</span><span class="p">)</span>

            <span class="k">if</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">mY</span><span class="p">)</span><span class="o">!=</span><span class="mi">0</span><span class="p">):</span>
                <span class="n">expectedYX</span> <span class="o">=</span> <span class="n">expected_freq</span><span class="p">(</span><span class="n">observedYX</span><span class="p">)</span>
                <span class="n">gsquare</span> <span class="o">+=</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">xlogy</span><span class="p">(</span><span class="n">observedYX</span><span class="p">,</span> <span class="n">observedYX</span><span class="p">)</span> 
                                      <span class="o">-</span> <span class="n">xlogy</span><span class="p">(</span><span class="n">observedYX</span><span class="p">,</span> <span class="n">expectedYX</span><span class="p">))</span>

                <span class="c1"># Check how many rows and columns are all-zeros. i.e. how may</span>
                <span class="c1"># marginals are zero in expected-frq</span>
                <span class="n">nzero_rows</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="o">~</span><span class="n">expectedYX</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span> 
                <span class="n">nzero_cols</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="o">~</span><span class="n">expectedYX</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span>

                <span class="c1"># Compute dof. Reduce by 1 dof for every marginal row &amp; column=</span>
                <span class="c1"># 0 and add to global degrees of freedom [adapted from</span>
                <span class="c1"># Bishop, 1975].</span>
                <span class="n">cardYX</span> <span class="o">=</span> <span class="n">observedYX</span><span class="o">.</span><span class="n">shape</span>
                <span class="n">dof</span> <span class="o">+=</span> <span class="p">((</span><span class="n">cardYX</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">nzero_rows</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">cardYX</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">nzero_cols</span><span class="p">))</span>

        <span class="c1"># dof cannot be lesser than 1</span>
        <span class="n">dof</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">dof</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">_temp_dof</span> <span class="o">=</span> <span class="n">dof</span>
        <span class="k">return</span> <span class="n">gsquare</span></div>

<div class="viewcode-block" id="Gsquared.get_analytic_significance"><a class="viewcode-back" href="../../../index.html#tigramite.independence_tests.gsquared.Gsquared.get_analytic_significance">[docs]</a>    <span class="k">def</span> <span class="nf">get_analytic_significance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">xyz</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;Return the p_value of test statistic value &#39;value&#39;, according to a</span>
<span class="sd">           chi-square distribution with &#39;dof&#39; degrees of freedom.&quot;&quot;&quot;</span>
                      
        <span class="c1"># Calculate the p_value</span>
        <span class="n">p_value</span> <span class="o">=</span> <span class="n">chi2</span><span class="o">.</span><span class="n">sf</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_temp_dof</span><span class="p">)</span>
        <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">_temp_dof</span>

        <span class="k">return</span> <span class="n">p_value</span></div></div>


<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
    
    <span class="kn">import</span> <span class="nn">tigramite</span>
    <span class="kn">from</span> <span class="nn">tigramite.data_processing</span> <span class="kn">import</span> <span class="n">DataFrame</span>
    <span class="kn">import</span> <span class="nn">tigramite.data_processing</span> <span class="k">as</span> <span class="nn">pp</span>
    <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

    <span class="n">seed</span><span class="o">=</span><span class="mi">42</span>
    <span class="n">random_state</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">default_rng</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
    <span class="n">cmi</span> <span class="o">=</span> <span class="n">Gsquared</span><span class="p">()</span>

    <span class="n">T</span> <span class="o">=</span> <span class="mi">1000</span>
    <span class="n">dimz</span> <span class="o">=</span> <span class="mi">3</span>
    <span class="n">z</span> <span class="o">=</span> <span class="n">random_state</span><span class="o">.</span><span class="n">binomial</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="n">dimz</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="n">dimz</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">T</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">T</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">T</span><span class="p">):</span>
        <span class="n">val</span> <span class="o">=</span> <span class="n">z</span><span class="p">[</span><span class="n">t</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
        <span class="n">prob</span> <span class="o">=</span> <span class="mf">0.2</span><span class="o">+</span><span class="n">val</span><span class="o">*</span><span class="mf">0.6</span>
        <span class="n">x</span><span class="p">[</span><span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">random_state</span><span class="o">.</span><span class="n">choice</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">],</span> <span class="n">p</span><span class="o">=</span><span class="p">[</span><span class="n">prob</span><span class="p">,</span> <span class="mf">1.</span><span class="o">-</span><span class="n">prob</span><span class="p">])</span>
        <span class="n">y</span><span class="p">[</span><span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">random_state</span><span class="o">.</span><span class="n">choice</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">p</span><span class="o">=</span><span class="p">[</span><span class="n">prob</span><span class="p">,</span> <span class="p">(</span><span class="mf">1.</span><span class="o">-</span><span class="n">prob</span><span class="p">)</span><span class="o">/</span><span class="mf">2.</span><span class="p">,</span> <span class="p">(</span><span class="mf">1.</span><span class="o">-</span><span class="n">prob</span><span class="p">)</span><span class="o">/</span><span class="mf">2.</span><span class="p">])</span>

    <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;start&#39;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="n">cmi</span><span class="o">.</span><span class="n">run_test_raw</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">z</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span>
    <span class="nb">print</span><span class="p">(</span><span class="n">cmi</span><span class="o">.</span><span class="n">run_test_raw</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">z</span><span class="o">=</span><span class="n">z</span><span class="p">))</span>
</pre></div>

          </div>
          
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
<h1 class="logo"><a href="../../../index.html">Tigramite</a></h1>








<h3>Navigation</h3>

<div class="relations">
<h3>Related Topics</h3>
<ul>
  <li><a href="../../../index.html">Documentation overview</a><ul>
  <li><a href="../../index.html">Module code</a><ul>
  </ul></li>
  </ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
  <h3 id="searchlabel">Quick search</h3>
    <div class="searchformwrapper">
    <form class="search" action="../../../search.html" method="get">
      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
      <input type="submit" value="Go" />
    </form>
    </div>
</div>
<script>document.getElementById('searchbox').style.display = "block"</script>








        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="footer">
      &copy;2023, Jakob Runge.
      
      |
      Powered by <a href="http://sphinx-doc.org/">Sphinx 5.0.2</a>
      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.12</a>
      
    </div>

    

    
  </body>
</html>